# 统计test.txt文件中每个单词的出现次数，并输出到屏幕上。

from pyspark import SparkContext, SparkConf

conf = SparkConf().setAppName("WordCount").setMaster("local[*]")
sc = SparkContext(conf=conf)

text = sc.textFile("/Users/lzy/Documents/workspace/python/study/practice/pyspark学习/test.txt")
words = text.flatMap(lambda line: line.split(" "))
words.map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b).foreach(lambda x: print(x))

# wordCounts = words.countByValue()

# for word, count in wordCounts.items():
#     print(word, count)

sc.stop()